library(socviz)
library(lubridate)
library(geofacet)
library(ggthemes)
library(ggrepel)
library(ggridges)
library(plyr)
library(skimr)
library(tidyverse)
library(gganimate)
library(plotly)
theme_set(theme_minimal())
# Read 2015 Data
h15 <- read_csv("Happiness_Data/2015.csv")
h15 <- h15 %>%
dplyr::mutate(Year = 2015) %>%
dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
H_score = `Happiness Score`,
GDP=`Economy (GDP per Capita)`,
Health=`Health (Life Expectancy)`,
Trust=`Trust (Government Corruption)`,
SE=`Standard Error`,
dystopia_res = `Dystopia Residual`)
# Read 2016 Data
h16 <- read_csv("Happiness_Data/2016.csv")
h16 <- h16 %>%
dplyr::mutate(Year = 2016,
`Standard Error` = (`Upper Confidence Interval`-`Lower Confidence Interval`)/3.92) %>%
# SE = (upper limit – lower limit) / 3.92.
# This is for 95% CI
dplyr::select(-c(`Upper Confidence Interval`,`Lower Confidence Interval`)) %>%
dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
H_score = `Happiness Score`,
GDP=`Economy (GDP per Capita)`,
Health=`Health (Life Expectancy)`,
Trust=`Trust (Government Corruption)`,
SE=`Standard Error`,
dystopia_res = `Dystopia Residual`)
# Since we don't have a variable 'Region' starting from 2017, we will create it for
# each year
h_regions <- dplyr::select(h16, Country, Region)
# Read 2017 Data
h17 <- read_csv("Happiness_Data/2017.csv")
h17 <- h17 %>%
dplyr::mutate(Year = 2017,
`Standard Error` = (`Whisker.high`-`Whisker.low`)/3.92,) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::select(-c(`Whisker.high`,`Whisker.low`)) %>%
dplyr::rename(H_rank=`Happiness.Rank`, # Modify variable names
H_score = Happiness.Score,
GDP=Economy..GDP.per.Capita.,
Health=Health..Life.Expectancy.,
Trust=Trust..Government.Corruption.,
SE=`Standard Error`,
dystopia_res = Dystopia.Residual)
# Read 2018 Data
h18 <- read_csv("Happiness_Data/2018.csv")
h18 <- h18 %>%
dplyr::mutate(Year = 2018) %>%
dplyr::rename(H_rank=`Overall rank`, # Modify variable names
H_score = `Score`,
GDP=`GDP per capita`,
Country = `Country or region`,
Health=`Healthy life expectancy`,
Trust=`Perceptions of corruption`,
Freedom = `Freedom to make life choices`,
Family = `Social support`) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::mutate(dystopia_res = H_score - (GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))
# Read 2019 Data
h19 <- read_csv("Happiness_Data/2019.csv")
h19 <- h19 %>%
dplyr::mutate(Year = 2019) %>%
dplyr::rename(H_rank=`Overall rank`, # Modify variable names
H_score = `Score`,
GDP=`GDP per capita`,
Country = `Country or region`,
Health=`Healthy life expectancy`,
Trust=`Perceptions of corruption`,
Freedom = `Freedom to make life choices`,
Family = `Social support`) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::mutate(dystopia_res = H_score -
(GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))
# Combine all data into all_dat
h_alldat <- tibble(rbind.fill(h15,h16,h17,h18,h19))
h_alldat <- h_alldat %>%
dplyr::mutate(Country = as.factor(tolower(Country)),
Region = as.factor(Region))
#rmarkdown::paged_table(h_alldat)
knitr::kable(papeR::summarize_numeric(h_alldat, type = "numeric", group = "Region",variables = c("H_rank"), test = FALSE))
| Region | N | Mean | SD | Min | Q1 | Median | Q3 | Max | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | H_rank | Australia and New Zealand | 10 | 9.10 | 1.10 | 8 | 8.0 | 9.0 | 10.0 | 11 | |||
| 1.1 | Central and Eastern Europe | 144 | 75.63 | 26.80 | 20 | 55.5 | 73.0 | 91.5 | 138 | ||||
| 1.2 | Eastern Asia | 28 | 66.32 | 22.54 | 25 | 52.0 | 65.0 | 83.5 | 101 | ||||
| 1.3 | Latin America and Caribbean | 109 | 49.75 | 29.97 | 12 | 28.0 | 43.0 | 63.0 | 148 | ||||
| 1.4 | Middle East and Northern Africa | 96 | 79.56 | 41.40 | 11 | 39.0 | 83.0 | 109.0 | 156 | ||||
| 1.5 | North America | 10 | 11.30 | 5.14 | 5 | 7.0 | 11.0 | 15.0 | 19 | ||||
| 1.6 | Southeastern Asia | 44 | 80.55 | 35.45 | 22 | 46.5 | 81.5 | 107.0 | 145 | ||||
| 1.7 | Southern Asia | 35 | 112.46 | 23.31 | 67 | 97.0 | 115.0 | 127.5 | 154 | ||||
| 1.8 | Sub-Saharan Africa | 185 | 126.86 | 21.41 | 55 | 114.0 | 131.0 | 143.0 | 158 | ||||
| 1.9 | Western Europe | 103 | 26.12 | 26.36 | 1 | 6.0 | 17.0 | 36.0 | 102 |
# Read data in
death_dat <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/number-of-deaths-by-risk-factor.csv')
death_dat <- death_dat %>%
filter(Year > 2015) %>%
arrange(Year)
rmarkdown::paged_table(death_dat)
country_profile <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/kiva_country_profile_variables.csv')
country_profile <- country_profile %>%
mutate(country = tolower(country)) %>%
dplyr::rename(Country = country) %>%
dplyr::select(-c(Region))
h_p_alldat <- merge(h_alldat, country_profile, by = "Country")
rmarkdown::paged_table(country_profile)
# Get Top 10 mean of happiness rank from 2015 ~ 2019
top_10 <- h_alldat %>%
group_by(Country) %>%
dplyr::summarise(mean_rank = mean(H_rank)) %>%
arrange(desc(mean_rank)) %>%
filter(mean_rank <= 10)
rmarkdown::paged_table(top_10)
ggplot(dplyr::filter(h_alldat, Region != "NA")) +
geom_boxplot(aes(x = H_score, y=reorder(Region, H_score), color = Region))+
theme_classic() +
theme(legend.position = "None") +
labs(x = "Happiness Scores", y = "Regions")

ggplot(dplyr::filter(h_alldat, Region != "NA"), aes(x = GDP, y=H_score, color = Region)) +
geom_point() +
theme_classic()

world_map <- map_data("world")
world <- world_map %>%
dplyr::rename(Country = region) %>%
dplyr::mutate(Country = str_to_lower(Country),
Country = ifelse(
Country == "usa",
"united states", Country),
Country = ifelse(
Country == "democratic republic of the congo",
"congo (kinshasa)", Country),
Country = ifelse(
Country == "republic of congo",
"congo (brazzaville)", Country),
Country = as.factor(Country))
h_alldat_world <- left_join(h_alldat, world, by = "Country",all.x=TRUE)
ggplot(data = h_alldat_world,
mapping = aes(x = long, y = lat, group = group, fill = H_score))

p <- ggplot(h_alldat_world, aes(long, lat, group = group, frame = Year))+
geom_polygon(aes(fill = H_score),
alpha = 1, na.rm = TRUE)+
scale_fill_gradient(low = "white", high = "#FD8104", na.value = NA) +
theme_map()
plotly::ggplotly(p)